output: pdf_document

Geographic Data

In this work DAE devices distribution is studied. DAE stands to automated external defibrillator, which are useful in case of cardiac emergencies, while time is crucial, so the closeness to one of this devices.

Set work directory, install and load libraries

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# Libraries
# install.packages("tidyverse")
# install.packages("sp")
# install.packages("sf")
# install.packages("rgdal") # DEPRECATED
# install.packages("rmapshaper")
# install.packages("wbstats")
# install.packages("rnaturalearth")
# install.packages("mapview")
# install.packages("spatialreg")
# install.packages("spatstat")

library(sp)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rgeos)
## rgeos version: 0.6-2, (SVN revision 693)
##  GEOS runtime version: 3.10.2-CAPI-1.16.0 
##  Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
##  GEOS using OverlayNG
##  Linking to sp version: 1.6-0 
##  Polygon checking: TRUE 
## 
## 
## Attaching package: 'rgeos'
## 
## The following object is masked from 'package:dplyr':
## 
##     symdiff
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
library(tidyr)
#library(raster)
#library(rgdal) 

Data management and graphical representation

Import dataset in a dataframe and check for duplicated records

dae_full <- read.csv("progetto-dae.csv", sep = ";")

duplicated_record = duplicated(dae_full)
double_record = subset(dae_full, duplicated_record) 
print(double_record)
##  [1] Nome               Città              Indirizzo          Ubicazione        
##  [5] Note               Orari              Telefono           Geo.Point         
##  [9] Quartiere          Zona.di.prossimità Area.statistica   
## <0 rows> (or 0-length row.names)

NON CI SONO DUPLICATI SE SI CONSIDERANO LE NOTE: ES TECHNOGYM PRESSO PORTINERIA O IN PRODUZIONE, MA SCARTANDO ALCUNE VARIABILI PARE CI SIANO VALORI RIPETUTI, quindi lavorando su dae e non dae_full. Li elimino comunque considerando che sono solo 83 vaori su 5298.

Selecting specific columns in a new df, keeping only useful info

dae <- dae_full[c("Nome", "Città", "Geo.Point")]
head(dae)

Discard duplicate records

duplicated_rows = duplicated(dae)
double_data = subset(dae, duplicated_rows) 
print(double_data)
##                                                     Nome
## 44                                      ESSELUNGA S.P.A.
## 45                                      ESSELUNGA S.P.A.
## 46                                      ESSELUNGA S.P.A.
## 47                                      ESSELUNGA S.P.A.
## 48                                      ESSELUNGA S.P.A.
## 1206                                           Vertivsrl
## 1386                              VILLAGGIO DELLA SALUTE
## 1469                                        L.F.  S.P.A.
## 1511                                  BORMIOLI LUIGI SPA
## 1519                     BARILLA G. E R. FRATELLI S.P.A.
## 1520                     BARILLA G. E R. FRATELLI S.P.A.
## 1687                               GSK MANUFACTORING SPA
## 1841                                    ESSELUNGA S.P.A.
## 1842                                    ESSELUNGA S.P.A.
## 1885                                      Cattani S.P.A.
## 1890        Agenzia delle Entrate di Parma - Riscossione
## 1980              AGENZIA INTERREGIONALE PER IL FIUME PO
## 2107                                         SAIB S.P.A.
## 2346                           DUERRE TUBI STYLE (DTS 2)
## 2405               Scuola secondaria di 1° grado Borgese
## 2411 Istituto tecnico economico statale Roberto Valturio
## 2412 Istituto tecnico economico statale Roberto Valturio
## 3027               Istituto Alberghiero Savioli Riccione
## 3084                                     TECNOPRIMAF SRL
## 3125                                               FIERA
## 3138                                                CAAB
## 3144                                     DITTA DATALOGIC
## 3180                            POLISPORTIVA MONTEVEGLIO
## 3215                              POLISPORTIVA MARTORANO
## 3234                        CASSA DI RISPARMIO DI CESENA
## 3248                                           TECHNOGYM
## 3254                          ASSOCIAZIONE CALCIO CESENA
## 3268                                  BORMIOLI LUIGI SPA
## 3397                                       DATALOGIC SPA
## 3546                                 Polisportiva Stella
## 3607                                    ESSELUNGA S.P.A.
## 3608                                    ESSELUNGA S.P.A.
## 3609                                    ESSELUNGA S.P.A.
## 3610                                    ESSELUNGA S.P.A.
## 3612                                  FORNOVO GAS S.P.A.
## 3616                                            INCO SPA
## 3653                                      Cattani S.P.A.
## 3662                       AIA Stabilimento di Capoponte
## 3664                                  Val Parma Hospital
## 3679                                          CARPIGIANI
## 3748              AGENZIA INTERREGIONALE PER IL FIUME PO
## 3880                              ROSSETTI MARKET S.r.l.
## 3887                              FOSSATI SERRAMENTI SRL
## 3889                                    LA PIZZA+ 1  SPA
## 4030                 CLUB VILLAGE E HOTEL SPIAGGIA ROMEA
## 4167                     Liceo Scientifico Volta-Fellini
## 4186                               Istituto "De Gasperi"
## 4188                Medicina dello Sport AUSL (Colosseo)
## 4206                       Palestra Scuola Media Pazzini
## 4213            Stazione Carabinieri di Novafeltria (RN)
## 4375                                             IMA SPA
## 4386                          AUTOMOBILI LAMBORGHINI SPA
## 4388                          AUTOMOBILI LAMBORGHINI SPA
## 4607                          CENTRO PER L'AUTOTRASPORTO
## 4779                                           fruttagel
## 4781                                   farmacia santerno
## 4816           Stadio del nuoto Centro Sportivo Riccione
## 4865                          GRANDI SALUMIFICI ITALIANI
## 4877                               I.I.S. ANTONIO MEUCCI
## 4884                                 Arcispazio Piumazzo
## 4923                                                CAAB
## 4924                                                CAAB
## 4933                                             IMA SPA
## 4938                                                IKEA
## 5001                            OROGEL SOC.COOP.AGRICOLA
## 5002                            OROGEL SOC.COOP.AGRICOLA
## 5004                        CASSA DI RISPARMIO DI CESENA
## 5005                        CASSA DI RISPARMIO DI CESENA
## 5006                          ASSOCIAZIONE CALCIO CESENA
## 5022                                           TECHNOGYM
## 5043                                  BORMIOLI LUIGI SPA
## 5044                                  BORMIOLI LUIGI SPA
## 5045                                  BORMIOLI LUIGI SPA
## 5122                                        ISII MARCONI
## 5144                              AZIENDA FLY GLOBAL NET
## 5185                               GSK MANUFACTORING SPA
## 5186                               GSK MANUFACTORING SPA
## 5195                                 Colombini Group Spa
##                         Città                              Geo.Point
## 44                      PARMA 44.849708557128906, 10.367918014526367
## 45                      PARMA 44.849708557128906, 10.367918014526367
## 46                      PARMA 44.849708557128906, 10.367918014526367
## 47                      PARMA 44.849708557128906, 10.367918014526367
## 48                      PARMA 44.849708557128906, 10.367918014526367
## 1206 CASTEL GUELFO DI BOLOGNA  44.43602752685547, 11.617171287536621
## 1386              MONTERENZIO  44.31161880493164, 11.460027694702148
## 1469                   CESENA   44.15047836303711, 12.21621036529541
## 1511                    PARMA  44.82695007324219, 10.326359748840332
## 1519                    PARMA 44.823341369628906, 10.381290435791016
## 1520                    PARMA 44.823341369628906, 10.381290435791016
## 1687                  TORRILE    44.8971061706543, 10.35757827758789
## 1841                    PARMA 44.849708557128906, 10.367918014526367
## 1842                    PARMA 44.849708557128906, 10.367918014526367
## 1885                    PARMA  44.84624481201172, 10.363381385803223
## 1890                    PARMA  44.81496047973633, 10.312209129333496
## 1980                    PARMA 44.807926177978516, 10.329997062683105
## 2107                   CAORSO  45.042388916015625, 9.822153091430664
## 2346                MARANELLO 44.536258697509766, 10.875616073608398
## 2405                   RIMINI  44.05144500732422, 12.575874328613281
## 2411                   RIMINI   44.04925537109375, 12.58498764038086
## 2412                   RIMINI   44.04925537109375, 12.58498764038086
## 3027                 RICCIONE   44.00173568725586, 12.64654541015625
## 3084   SAN CESARIO SUL PANARO   44.58967590332031, 11.02854061126709
## 3125                  BOLOGNA  44.50872039794922, 11.366950035095215
## 3138                  BOLOGNA   44.51411056518555, 11.40820026397705
## 3144         MONTE SAN PIETRO 44.422298431396484, 11.173110008239746
## 3180              VALSAMOGGIA   44.4734992980957, 11.101129531860352
## 3215                   CESENA 44.168968200683594, 12.257120132446289
## 3234                   CESENA  44.14461135864258, 12.237059593200684
## 3248                   CESENA      44.1708984375, 12.277190208435059
## 3254                   CESENA  44.12149429321289, 12.184496879577637
## 3268                    PARMA  44.82695007324219, 10.326359748840332
## 3397        CALDERARA DI RENO    44.5435905456543, 11.30077838897705
## 3546                   RIMINI  44.04840850830078, 12.572135925292969
## 3607                    PARMA 44.849708557128906, 10.367918014526367
## 3608                    PARMA 44.849708557128906, 10.367918014526367
## 3609                    PARMA 44.849708557128906, 10.367918014526367
## 3610                    PARMA 44.849708557128906, 10.367918014526367
## 3612             TRAVERSETOLO  44.64885711669922, 10.385416984558105
## 3616                    PARMA  44.78468322753906, 10.287612915039062
## 3653                    PARMA  44.84624481201172, 10.363381385803223
## 3662        TIZZANO VAL PARMA  44.48647689819336, 10.243917465209961
## 3664               LANGHIRANO 44.617034912109375, 10.268505096435547
## 3679       ANZOLA DELL EMILIA    44.5411491394043, 11.21133041381836
## 3748                    PARMA 44.807926177978516, 10.329997062683105
## 3880                   ALSENO   44.886844635009766, 9.98708724975586
## 3887               ROTTOFRENO    45.0426139831543, 9.601079940795898
## 3889                PODENZANO  44.991371154785156, 9.690260887145996
## 4030                COMACCHIO  44.78233337402344, 12.250884056091309
## 4167                 RICCIONE    44.002685546875, 12.647295951843262
## 4186      MORCIANO DI ROMAGNA   43.91582489013672, 12.64973258972168
## 4188                   RIMINI  44.040733337402344, 12.57896900177002
## 4206                VERUCCHIO 44.009498596191406, 12.434885025024414
## 4213              NOVAFELTRIA 43.890804290771484, 12.289532661437988
## 4375       OZZANO DELL EMILIA   44.44227981567383, 11.49416732788086
## 4386     SANT AGATA BOLOGNESE  44.65670394897461, 11.122122764587402
## 4388     SANT AGATA BOLOGNESE  44.65787887573242, 11.126039505004883
## 4607                   CESENA 44.130393981933594, 12.234954833984375
## 4779                ALFONSINE  44.51097869873047, 12.042570114135742
## 4781                  RAVENNA  44.43552017211914, 12.059149742126465
## 4816                 RICCIONE  44.00365447998047, 12.637453079223633
## 4865                   MODENA       44.591796875, 10.950569152832031
## 4877                    CARPI   44.78599548339844, 10.86845874786377
## 4884      CASTELFRANCO EMILIA  44.55743408203125, 11.062333106994629
## 4923                  BOLOGNA   44.51411056518555, 11.40820026397705
## 4924                  BOLOGNA   44.51411056518555, 11.40820026397705
## 4933       OZZANO DELL EMILIA  44.442588806152344, 11.48770809173584
## 4938      CASALECCHIO DI RENO 44.487491607666016, 11.251609802246094
## 5001                   CESENA 44.167789459228516, 12.216970443725586
## 5002                   CESENA   44.17005920410156, 12.21891975402832
## 5004                   CESENA  44.14461135864258, 12.237059593200684
## 5005                   CESENA  44.14461135864258, 12.237059593200684
## 5006                   CESENA  44.14017105102539, 12.260809898376465
## 5022                   CESENA      44.1708984375, 12.277190208435059
## 5043                    PARMA  44.82695007324219, 10.326359748840332
## 5044                    PARMA  44.82695007324219, 10.326359748840332
## 5045                    PARMA  44.82695007324219, 10.326359748840332
## 5122                 PIACENZA   45.04444122314453, 9.691390037536621
## 5144                   CAORSO   45.05611038208008, 9.879170417785645
## 5185                  TORRILE    44.8971061706543, 10.35757827758789
## 5186                  TORRILE    44.8971061706543, 10.35757827758789
## 5195      CASTELLO SERRAVALLE  43.99053192138672, 12.519020080566406
#print(count(double_data))

Remove duplicates from dae

dae <- unique(dae)
#head(dae)
count(dae)

Count how many cities

unique_cities <- unique(dae$Città)
city_numb = length(unique_cities)
print(city_numb)
## [1] 326
#table(dae$Città)
barplot(table(dae$Città),main='DAE per Municipality')

Since it’s difficult to understand, select the 10 with most and less DAE.

# Count the number of occurrences of each city
city_counts <- table(dae$Città)

# Sort the cities by their counts in descending order
sorted_cities <- sort(city_counts, decreasing = TRUE)
#print(sorted_cities)
head(sorted_cities, 10)
## 
##           PIACENZA            BOLOGNA              PARMA REGGIO NELL EMILIA 
##                340                258                253                203 
##             RIMINI             CESENA            FERRARA             MODENA 
##                201                183                148                137 
##            RAVENNA         CESENATICO 
##                124                 88
tail(sorted_cities, 10)
## 
##                 LAGOSANTO                    LOIANO              MASI TORELLO 
##                         1                         1                         1 
## MEZZANI - SORBOLO MEZZANI                MODIGLIANA                   MONTESE 
##                         1                         1                         1 
##                  MONTIANO        ROCCA SAN CASCIANO                   TORNOLO 
##                         1                         1                         1 
##                     ZERBA 
##                         1
# Extract the names of the top and bottom cities
top_cities <- names(sorted_cities)[1:10]
bottom_cities <- names(sorted_cities)[(length(sorted_cities) - 9):length(sorted_cities)]
#print(top_cities)
#print(bottom_cities)

# Filter the data for the top and bottom cities
top_data <- dae[dae$Città %in% top_cities, ]
bottom_data <- dae[dae$Città %in% bottom_cities, ]
# Barplot for top cities
barplot(table(top_data$Città), main = 'Top 10 Cities with Most DAE', col = "forestgreen", las = 1, cex.names = 0.4)

# Barplot for bottom cities
barplot(table(bottom_data$Città), main = '10 Cities with Less DAE', col = "orangered", las = 2, cex.names = 0.4)

# devide geopoint into latutidue + longitude
dae_coord <- separate(dae, 
                      Geo.Point, 
                      into = c("Latitude", "Longitude"),
                      sep = ", ")
head(dae_coord)
# Convert the dataset to an 'sf' object and specify CRS
dae_wgs <- st_as_sf(dae_coord, 
                      coords = c("Longitude", "Latitude"),
                      crs = 4326)  # 3857 EPSG code for WGS84 (standard for lat/long)
head(dae_wgs)
#print(dae_wgs)

Map where devices are located

ggplot(dae_wgs) +
  geom_sf(color = "forestgreen", alpha = .5, size = .3) +
  labs(title = "Single Regional Register of Defibrillators - AED") +
  theme_minimal() +
  coord_sf(expand = FALSE)

Map Emilia-Romagna municipalities borders

## import shapefile - SpatialData
municipalities <- sf::st_read("V_COM_GPG_3.shx")
## Reading layer `V_COM_GPG_3' from data source 
##   `/Users/GianmarcoSantoro/miniconda3/Projects/GeoSpacial/V_COM_GPG_3.shx' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 9.198528 ymin: 43.73088 xmax: 12.75532 ymax: 45.13901
## CRS:           NA
#print(municipalities)
#summary(municipalities)

# sistema di riferimento geografico: WGS84 - UTM ZONE 32N
# con il pacchetto "sf" si possono convertire i CRS
# Set the correct CRS, EPSG:4326 to WGS84
st_crs(municipalities) <- st_crs("EPSG:4326")

# Convert the dataset to an 'sf' object and specify CRS
municipalities_coord <- st_as_sf(municipalities, 
                      coords = c("Longitude", "Latitude"),
                      crs = 4326) 

print(municipalities_coord)
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 9.198528 ymin: 43.73088 xmax: 12.75532 ymax: 45.13901
## Geodetic CRS:  WGS 84
## First 10 features:
##                          geometry
## 1  MULTIPOLYGON (((10.86582 44...
## 2  MULTIPOLYGON (((12.6792 43....
## 3  MULTIPOLYGON (((9.888571 45...
## 4  MULTIPOLYGON (((9.966079 45...
## 5  MULTIPOLYGON (((10.074 44.9...
## 6  MULTIPOLYGON (((12.14862 43...
## 7  MULTIPOLYGON (((11.43073 44...
## 8  MULTIPOLYGON (((10.75838 44...
## 9  MULTIPOLYGON (((11.80945 44...
## 10 MULTIPOLYGON (((9.447531 44...
#summary(municipalities_coord)

ggplot(municipalities_coord) +
  geom_sf(color = "darkgray", alpha = .5, size = .3) +
  labs(title = "Municipalities") +
  theme_minimal() +
  coord_sf(expand = FALSE)

Check of reference systems are same

st_crs(dae_wgs) == st_crs(municipalities_coord)
## [1] TRUE

Map DAE devices and municipalities

# Plot of devices and municipalities
ggplot() +
  geom_sf(data = municipalities_coord, color = "darkgray", alpha = 0.5, size = 0.3) +
  geom_sf(data = dae_wgs, color = "forestgreen", alpha = 0.5, size = 0.3) +
  labs(title = "Defibrillators in Emilia-Romagna") +
  theme_minimal() +
  coord_sf(expand = FALSE)

We can notice that 2 devices are located in the Adriatic sea, as well as San Marino’s ones are shown. The ones in the sea can be in islands or gas platforms or in other off-shore systems.

Spatial Point Patterns Analysis

library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 3.1-0
## Loading required package: spatstat.random
## spatstat.random 3.1-4
## Loading required package: spatstat.explore
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## spatstat.explore 3.1-0
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.2-1
## Loading required package: spatstat.linnet
## spatstat.linnet 3.0-6
## 
## spatstat 3.0-3 
## For an introduction to spatstat, type 'beginner'
library(splancs) # to compare kernel estimations
## 
## Spatial Point Pattern Analysis Code in S-Plus
##  
##  Version 2 - Spatial and Space-Time analysis
## 
## Attaching package: 'splancs'
## The following object is masked from 'package:dplyr':
## 
##     tribble
## The following object is masked from 'package:tidyr':
## 
##     tribble
## The following object is masked from 'package:tibble':
## 
##     tribble
head(dae_wgs)

Use projection 3003 to have better insights in meters and not degrees

# EPSG:4326 for WGS 84 and EPSG:3003 for Italian projection from Monte Mario reference point
dae_proj1 <- st_transform(dae_wgs, crs = 3003)
print(dae_proj1)
## Simple feature collection with 5215 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 1522642 ymin: 4851522 xmax: 1800810 ymax: 4994375
## Projected CRS: Monte Mario / Italy zone 1
## First 10 features:
##                                         Nome               Città
## 1                      COOP SOCIALE BUCANEVE               BARDI
## 2                     Assicurazioni Generali              RIMINI
## 3                          Bar Dovesi Rimini              RIMINI
## 4                       Tennis Club Riccione            RICCIONE
## 5        Liceo Scientifico Einstein Palestra              RIMINI
## 6                            CURIA VESCOVILE               FORLI
## 7  Stadio del nuoto Centro Sportivo Riccione            RICCIONE
## 8                    BODY LIFE 2.0 S.S.D.R.L CASTELNUOVO RANGONE
## 9                     Chiesa di Sant'Antonio SALSOMAGGIORE TERME
## 10                   A.S.D. Bedoniese United             BEDONIA
##                   geometry
## 1  POINT (1557844 4942194)
## 2  POINT (1785075 4881710)
## 3  POINT (1785827 4884659)
## 4  POINT (1791947 4878554)
## 5  POINT (1787171 4883532)
## 6  POINT (1742442 4901098)
## 7  POINT (1791651 4878730)
## 8  POINT (1653887 4935403)
## 9  POINT (1578199 4963594)
## 10 POINT (1550200 4928683)
municipalities_proj <- st_transform(municipalities_coord, crs = 3003)
print(municipalities_proj)
## Simple feature collection with 330 features and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1515770 ymin: 4846959 xmax: 1801305 ymax: 4998818
## Projected CRS: Monte Mario / Italy zone 1
## First 10 features:
##                          geometry
## 1  MULTIPOLYGON (((1648239 493...
## 2  MULTIPOLYGON (((1795348 487...
## 3  MULTIPOLYGON (((1570027 498...
## 4  MULTIPOLYGON (((1576002 499...
## 5  MULTIPOLYGON (((1584799 497...
## 6  MULTIPOLYGON (((1753181 485...
## 7  MULTIPOLYGON (((1694092 490...
## 8  MULTIPOLYGON (((1639384 494...
## 9  MULTIPOLYGON (((1723832 491...
## 10 MULTIPOLYGON (((1535488 494...
# Extracting coordinates into separate columns, devide geometry into latutidue + longitude
coordinates <- st_coordinates(dae_proj1)
dae_proj <- cbind(dae_proj1$Nome, dae_proj1$Città, coordinates)

# Rename the columns
colnames(dae_proj) <- c("Nome", "Città", "Longitude", "Latitude")
head(dae_proj)
##      Nome                                  Città      Longitude         
## [1,] "COOP SOCIALE BUCANEVE"               "BARDI"    "1557844.23150378"
## [2,] "Assicurazioni Generali"              "RIMINI"   "1785075.14596705"
## [3,] "Bar Dovesi Rimini"                   "RIMINI"   "1785826.93548339"
## [4,] "Tennis Club Riccione"                "RICCIONE" "1791946.69377474"
## [5,] "Liceo Scientifico Einstein Palestra" "RIMINI"   "1787170.52588819"
## [6,] "CURIA VESCOVILE"                     "FORLI"    "1742442.10838033"
##      Latitude          
## [1,] "4942193.79963972"
## [2,] "4881710.22510532"
## [3,] "4884658.81470574"
## [4,] "4878553.52190728"
## [5,] "4883531.90980742"
## [6,] "4901097.93272644"
dae_small <- dae_proj
summary(dae_small)
##      Nome              Città            Longitude           Latitude        
##  Length:5215        Length:5215        Length:5215        Length:5215       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character
# Ensure dae_small is a dataframe
dae_small <- as.data.frame(dae_small)

# Convert Longitude and Latitude columns to numeric
dae_small$Longitude <- as.numeric(dae_small$Longitude)
dae_small$Latitude <- as.numeric(dae_small$Latitude)

# Round Longitude and Latitude to 5 decimal places
dae_small$Longitude <- round(dae_small$Longitude, 5)
dae_small$Latitude <- round(dae_small$Latitude, 5)

# duplicated_small = duplicated(dae_small)
# double_small = subset(dae_small, duplicated_small) 
# print(double_small)

# Remove duplicates
dae_small <- unique(dae_small)

# Check the first few rows to confirm the changes
summary(dae_small)
##      Nome              Città             Longitude          Latitude      
##  Length:5215        Length:5215        Min.   :1522642   Min.   :4851522  
##  Class :character   Class :character   1st Qu.:1607806   1st Qu.:4917708  
##  Mode  :character   Mode  :character   Median :1656938   Median :4940888  
##                                        Mean   :1664463   Mean   :4937971  
##                                        3rd Qu.:1718146   3rd Qu.:4963326  
##                                        Max.   :1800810   Max.   :4994375
head(dae_small)

Considered rectangular area containing location of devices

x_range <- range(dae_small$Longitude)
y_range <- range(dae_small$Latitude) 
xy_area <- owin(x_range, y_range)

# Transform into ppp (point pattern dataset) to show info in the area of interest
dae_ppp = ppp(dae_small$Longitude, dae_small$Latitude, window = xy_area)  # warning: duplicated points
## Warning: data contain duplicated points
summary(dae_ppp)
## Planar point pattern:  5215 points
## Average intensity 1.312378e-07 points per square unit
## 
## *Pattern contains duplicated points*
## 
## Coordinates are given to 1 decimal place
## i.e. rounded to the nearest multiple of 0.1 units
## 
## Window: rectangle = [1522641.6, 1800810.1] x [4851522, 4994375] units
##                     (278200 x 142900 units)
## Window area = 3.9737e+10 square units

CONTROLLA BENE QUESTA PARTE

# Dato che ci possono essere delle città in cui c'è stato più di un evento,
# "sposto" leggermente i punti "duplicati"
dae_ppp_r <- dae_ppp[!duplicated(dae_ppp)]
dup_j <- rjitter(dae_ppp[duplicated(dae_ppp)], radius=0.01, retry=TRUE, nsim=1, drop=TRUE)

#riaggiungo i punti in locations molto vicine, evitando che i nuovi siano sovrapposti
dae_ppp_j <- superimpose(dae_ppp_r,dup_j) 
dae_ppp_originali <- dae_ppp
dae_ppp <- dae_ppp_j
plot(dae_ppp_originali, cols='forestgreen',  cex=0.3, main = "Point Pattern Plot") 
plot(dup_j, pch=2, cols='brown', add=T, cex=0.6)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.3)

# Create a legend for the plot
legend("bottomleft", 
       legend = c("ppp", "Duplicates"),
       col = c("forestgreen", "brown"), 
       pch = c(1, 2), 
       cex = 0.5)

# In genere con i PPP è bene non avere due punti esattamente uguali...
# "when the data has coincidence points, some statistical procedures will be severely affected. 
# So it is always strongly advisable to check for duplicate points and to decide on a strategy 
# for dealing with them if they are present” (Baddeley et al., 2016: p.60).

First Order Effects

Kernel Density Estimate: estimate the intensity of the process, with a kernel estimation

k500 = density(dae_ppp, sigma = 500, dimyx = 500)
plot(k500)
#plot(dae_proj1$geometry, cex=0.01, add=T)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)

We can see in Piacenza, Modena, Parma, Cesena, Rimini some high values, but still not so informative

Select an optimal bandwidth with Likelihood Cross Validation critirion

# Setting the seed for reproducibility
set.seed(23)

# Calculate the bandwidth for the point pattern
sigma_ppl <- bw.ppl(dae_ppp)
print(sigma_ppl)
##    sigma 
## 961.3408
kopt = density(dae_ppp, sigma = sigma_ppl, dimyx = 500)
plot(kopt)
#plot(dae_proj1$geometry, col = "forestgreen" , add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)

Try some more values

k2000 = density(dae_ppp, sigma = 2000, dimyx = 500)
plot(k2000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)

k5000 = density(dae_ppp, sigma = 5000, dimyx = 500)
plot(k5000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)

From my point of view this is the most informative plot.

k10000 = density(dae_ppp, sigma = 10000, dimyx = 500)
plot(k10000)
plot(dae_proj1$geometry, col = "forestgreen", add=T, cex=0.01)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.1)

Save all output as SpatialGrid, for differnt values of sigma in kernel density

SG <- as(k500, "SpatialGridDataFrame")
SG <- cbind(SG,as(kopt, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k2000, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k5000, "SpatialGridDataFrame"))
SG <- cbind(SG,as(k10000, "SpatialGridDataFrame"))

names(SG) <- c("k500","kopt","k2000","k5000","k10000")

With some plots of this kind, k500 and kopt show better granularity of this process

spplot(SG, c("k500","kopt"),col.regions=terrain.colors(11))

# fai una prova con un kernel diverso tipo quadratico tipo qui?
#kq_ppl <- density(casi_ppp,sigma=sigma_ppl,kernel="quartic",xy=griglia)

# Show results
summary(as.data.frame(SG)[,1:5])
##       k500                kopt               k2000          
##  Min.   :0.000e+00   Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:0.000e+00   1st Qu.:0.000e+00   1st Qu.:0.000e+00  
##  Median :0.000e+00   Median :1.550e-10   Median :1.759e-08  
##  Mean   :1.312e-07   Mean   :1.312e-07   Mean   :1.314e-07  
##  3rd Qu.:3.930e-09   3rd Qu.:7.578e-08   3rd Qu.:1.212e-07  
##  Max.   :4.276e-05   Max.   :2.311e-05   Max.   :9.253e-06  
##      k5000               k10000         
##  Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:3.646e-10   1st Qu.:8.066e-09  
##  Median :4.296e-08   Median :6.200e-08  
##  Mean   :1.321e-07   Mean   :1.327e-07  
##  3rd Qu.:1.585e-07   3rd Qu.:1.924e-07  
##  Max.   :2.405e-06   Max.   :1.040e-06

CSR simulated process, spacial random process following binomial distribution

CSR <-runifpoint(dae_ppp$n, win = dae_ppp$window)

plot(dae_ppp, cex=0.1, main= "DAE positions vs Random DAE (red)")
plot(CSR, add=T, col='red', pch=20, cex=0.1)
plot(municipalities_proj, border = "darkgray", add=T, lwd = 0.5)

There is regular pattern of clusters, differently from the random representation in red. Clusters are in correspondece to main city centers and Riviera Romagnola, where in summer there is a significant increase in people prensence.

G function: event-event distance

Nearest neighbour distance

nns <- nndist(dae_ppp)
#summary(nns)
plot(ecdf(nns))  # empirical cumulative distribution function

It climbs very steeply in the early part of its range before flattening out, then the indication would be an observed probability of short as opposed to long nearest neighbour distances, which suggest inter-event interaction (clustering) In about 5 km quite every DAE has a neighbor.

Gfunc <- Gest(dae_ppp)
Gfunc2 <- Gest(dae_ppp,correction = "none")
plot(Gfunc,lwd=2)

Might be low informative in this study considering the #equivale alle G-function senza nessuna correzione per i bordi # climbs very steeply in the early part of its range before flattening out, then the indication would # be an observed probability of short as opposed to long nearest neighbour distances, which suggest inter-event # interaction (clustering). in caso contrario ci sarebbe stata repulsione (che magari c’è ad una scala più piccola, # tipo di 1-2 km in caso di presenza di un altro dispositivo tipo nella stessa strada)

#calculates the nearest neighbor distances for each point in the dae_ppp point pattern, # provides a summary of these distances, and then plots the empirical cumulative distribution function (ECDF) # to visualize the spatial pattern of the points. This helps in understanding whether the points exhibit clustering, # regularity, or randomness in their distribution

In this analysis G function is not so informative, since the distance between the nearest DAE can be very high

in city centers and low in montains, but it could be more informative to see the distance between a random point and

the nearest DAE in case of need (F function)!

#confronto con la distribuzione CSR (= complete spatial randomness = Processo di Poisson omogeneo)
ex <- expression(runifpoint(dae_ppp$n, win = dae_ppp$window))
resG <- envelope(dae_ppp, Gest, nsim = 99, simulate = ex,
                 verbose = FALSE, saveall = TRUE)
plot(resG)
plot(Gfunc,add=T)

## Funzione F: distanza punto-evento
?Fest
Ffunc <- Fest(dae_ppp)
plot(Ffunc,lwd=2)

#confronto con la distribuzione CSR (= complete spatial randomness = Processo di Poisson omogeneo)
resF <- envelope(dae_ppp, Fest, nsim = 99, simulate = ex,
                 verbose = FALSE, saveall = TRUE)
plot(resF)

#confronto F_obs - G_obs
#per fare il confronto le funzioni devono essere calcolare per gli stessi valori di distanza
dist <- seq(0,12000, 0.00258)
Foss <- Fest(dae_ppp,r=dist)
Goss <- Gest(dae_ppp,r=dist)

plot(Foss$rs,Goss$rs,xlim=c(0,1),ylim=c(0,1),type='l',xlab="F",ylab="G")  #disegno le stime corrette per il bordo (border method or 

# "reduced sample" estimator)
abline(a=0,b=1,col='red')

# Anche in questo caso, la linea retta con angolo a 45 gradi indica la CSR, quindi ci aspetteremmo che la
# curva osservata vi si avvicini nel caso in cui il processo fosse random. Invece, sarrebbe sopra la linea in 
# caso di clustering (come osserviamo) e sotto in caso di regolarità. In questo caso, il clustering sembra 
# piuttosto sostanziale.
## Funzione K
?Kest
Kfunc <- Kest(dae_ppp)
## number of data points exceeds 3000 - computing border correction estimate only
plot(Kfunc,lwd=2)

resK <- envelope(dae_ppp, Kest, nsim = 9, simulate = ex,
                 verbose = FALSE, saveall = TRUE)
plot(resK)

#  La K function ci fornisce la distanza di un evento con ogni singolo altro evento e non solo col suo vicino più vicino.
# Anche qui il clustering è evidente, dato che si discosta di parecchio dalla linea rossa, che è la CSR (poisson).
## Funzione L (trasformazione della Ripley's K-function)
?Lest
Lfunc <- Lest(dae_ppp)
## number of data points exceeds 3000 - computing border correction estimate only
plot(Lfunc,lwd=2)

resL <- envelope(dae_ppp, Lest, nsim = 9, simulate = ex,
                 verbose = FALSE, saveall = TRUE)
plot(resL)

# Anche in questo caso, L(r) > r implica il clustering, che è ciò che osserviamo.
# Il felsso perchè aumentando la distanza c'è più regolarità?